# OTU table
otu_table = read_tsv("../data/otu-table.tsv", skip = 1)
otu_id = otu_table$`#OTU ID`
otu_table = data.frame(otu_table[, -1], check.names = FALSE, row.names = otu_id)

# Taxonomy table
tax = read_tsv("../data/taxonomy.tsv")
otu_id = tax$`Feature ID`
tax = data.frame(tax[, - c(1, 3)], row.names = otu_id)
tax = tax %>% separate(col = Taxon, 
                       into = c("Kingdom", "Phylum", "Class", "Order", 
                                "Family", "Genus", "Species"),
                       sep = ";")
for (i in 1:ncol(tax)) {
  tax[, i] = sapply(tax[, i], function(x) str_split(x, "__")[[1]][2])
}
tax = as.matrix(tax)
tax[tax == ""] = NA

# Tree
tree = read_tree("../data/tree.nwk")

# Meta data
meta_data = read_tsv("../data/metadata.tsv")
meta_data = meta_data %>%
  mutate_if(is.character, as.factor)
meta_data$sampleid = as.character(meta_data$sampleid)
meta_data$caregiver_stress_level = factor(meta_data$caregiver_stress_level,
                                          levels = c("Low",  "Medium",  "High"))
meta_data$depression_level = factor(meta_data$depression_level,
                                    levels = c("Low",  "Medium",  "High"))
meta_data$hostility_level = factor(meta_data$hostility_level,
                                   levels = c("Low",  "Medium",  "High"))
meta_data$das_level = factor(meta_data$das_level,
                             levels = c("Low",  "Medium",  "High"))
meta_data$metabolic_syndrome_level = factor(meta_data$metabolic_syndrome_level,
                                            levels = c("Low",  "Medium",  "High"))

OTU = otu_table(otu_table, taxa_are_rows = TRUE)
META = sample_data(meta_data)
sample_names(META) = meta_data$sampleid
TAX = tax_table(tax)
otu_data = phyloseq(OTU, TAX, META, tree)

1. Analyses at genus level

# Aggregate taxa
genus_data = aggregate_taxa(otu_data, "Genus")
genus_data2 = merge_taxa2(genus_data, pattern = "\\Clostridium", name = "Clostridium")
genus_rarefied = rarefy_even_depth(genus_data2, rngseed = 1, 
                                   sample.size = 0.9 * min(sample_sums(genus_data2)), 
                                   replace = FALSE)

1.1 Alpha diversity: Shannon’s diversity index

P-value is obtained by Kruskal-Wallis Rank Sum Test.

d_alpha = alpha(genus_rarefied, index = "diversity_shannon")
df_alpha = data.frame(d = d_alpha$diversity_shannon,
                      meta(genus_rarefied) %>% 
                        dplyr::select(alcohol, 
                                      caregiver_stress_level:metabolic_syndrome_level))
covariates = colnames(df_alpha)[-1]
labels = c("Alcohol Use", "Caregiver Stress Level", "Depression Level",
           "Hostility Level", "DAS Level", "Metabolic Syndrome Level")

for (i in seq_along(covariates)) {
  cat("\n \n \n")
  covariate = covariates[i]
  label = labels[i]
  tformula = formula(paste0("d ~ ", covariate)) 
  p_val = kruskal.test(tformula, data = df_alpha)$p.value
  df_ann = data.frame(x = 2.2, y = 1.01 * max(df_alpha$d), p = p_val)
  df_ann = df_ann %>% 
    mutate(label = paste0("p = ", signif(p, 2)))
  
  df_fig = df_alpha %>% 
    transmute(x = !!as.name(covariate), y = d)
  df_fig = df_fig[complete.cases(df_fig), ]
  
  p = df_fig %>%
    ggplot(aes(x = x, y = y)) + 
    geom_violin(trim = FALSE, aes(fill = x)) +
    geom_boxplot(width = 0.1, fill = "white") +
    scale_fill_discrete(name = NULL) +
    geom_label(data = df_ann, aes(x = x, y = y, label = label),
               size = 4, vjust = -0.5, hjust = 0, color = "orange") +
    labs(x = NULL, y = "Shannon’s diversity index", title = label) + 
    theme_bw() + 
    theme(strip.background = element_rect(fill = "white"), 
          legend.position = "bottom",
          plot.title = element_text(hjust = 0.5))
  print(p)
  cat("\n \n \n")
  fig_path1 = paste0("../images/diversity/genus_alpha_", covariate, ".jpeg")
  fig_path2 = paste0("../images/diversity/genus_alpha_", covariate, ".pdf")
  ggsave(filename = fig_path1, plot = p, height = 5, width = 6.25, 
         units = 'in', dpi = 300) 
  ggsave(filename = fig_path2, plot = p, height = 5, width = 6.25) 
}

1.2 Beta diversity: Bray-Curtis dissimilarity

P-values are obtained by Permutational Multivariate Analysis of Variance (PERMANOVA) and Permutational Analysis of Multivariate Dispersion (PERMDISP).

for (i in seq_along(covariates)) {
  cat("\n \n \n")
  covariate = covariates[i]
  label = labels[i]
  pseq = genus_rarefied
  sample_data(pseq)$covariate = meta(pseq)[, covariate]
  pseq_subset = subset_samples(pseq, !is.na(covariate))
  
  set.seed(123)
  # PERMANOVA
  permanova = adonis(t(abundances(pseq_subset)) ~ covariate, 
                     data = meta(pseq_subset), 
                     permutations = 999, method = "bray")$aov.tab
  # PERMDISP
  dis = vegdist(t(abundances(pseq_subset)), method = "bray")
  groups = sample_data(pseq_subset)$covariate
  mod = betadisper(d = dis, group = groups, type = "median")

  # Draw the Plot
  labs = paste0("PCoA", 1:2, " (", signif(100 * mod$eig / sum(mod$eig), 3), "%)")
  # brewer.pal(n = 8, name = "Accent")
  plot(mod, pch = 15:16, cex.lab = 1.25, cex = 1, 
       main = label, 
       xlab = labs[1], ylab = labs[2], ylim = c(-0.4, 0.6), xaxt = "n",
       col = c("#7FC97F", "#BEAED4"), sub = NULL,
       hull = FALSE, ellipse = TRUE, conf = 0.68) # 68% data coverage for data ellipses
  axis(1, at = round(seq(-0.6, 0.6, by = 0.2), 1), las = 1)
  legend(x = 0.5, y = 0.2, legend = unique(groups),
         col = c("#7FC97F", "#BEAED4"), pch = 15:16, cex = 0.8)
  legend(x = 0.2, y = 0.6, cex = 0.7,
         legend = c(paste0("p (PERMANOVA) = ", signif(permanova$`Pr(>F)`[1], 2)),
                    paste0("p (PERMDISP) = ", signif(permutest(mod)$tab$`Pr(>F)`[1], 2))))
  cat("\n \n \n")
  # Export
  fig_path1 = paste0("../images/diversity/genus_beta_", covariate, ".jpeg")
  jpeg(filename = fig_path1, height = 5, width = 6.25, res = 300, units = "in")
  plot(mod, pch = 15:16, cex.lab = 1.25, cex = 1, 
       main = label, 
       xlab = labs[1], ylab = labs[2], ylim = c(-0.4, 0.6), xaxt = "n",
       col = c("#7FC97F", "#BEAED4"), sub = NULL,
       hull = FALSE, ellipse = TRUE, conf = 0.68) # 68% data coverage for data ellipses
  axis(1, at = round(seq(-0.6, 0.6, by = 0.2), 1), las = 1)
  legend(x = 0.5, y = 0.2, legend = unique(groups),
         col = c("#7FC97F", "#BEAED4"), pch = 15:16, cex = 0.8)
  legend(x = 0.2, y = 0.6, cex = 0.7,
         legend = c(paste0("p (PERMANOVA) = ", signif(permanova$`Pr(>F)`[1], 2)),
                    paste0("p (PERMDISP) = ", signif(permutest(mod)$tab$`Pr(>F)`[1], 2))))
  dev.off()
  
  fig_path2 = paste0("../images/diversity/genus_beta_", covariate, ".pdf")
  pdf(file = fig_path2, height = 5, width = 6.25)
  plot(mod, pch = 15:16, cex.lab = 1.25, cex = 1, 
       main = label, 
       xlab = labs[1], ylab = labs[2], ylim = c(-0.4, 0.6), xaxt = "n",
       col = c("#7FC97F", "#BEAED4"), sub = NULL,
       hull = FALSE, ellipse = TRUE, conf = 0.68) # 68% data coverage for data ellipses
  axis(1, at = round(seq(-0.6, 0.6, by = 0.2), 1), las = 1)
  legend(x = 0.5, y = 0.2, legend = unique(groups),
         col = c("#7FC97F", "#BEAED4"), pch = 15:16, cex = 0.8)
  legend(x = 0.2, y = 0.6, cex = 0.7,
         legend = c(paste0("p (PERMANOVA) = ", signif(permanova$`Pr(>F)`[1], 2)),
                    paste0("p (PERMDISP) = ", signif(permutest(mod)$tab$`Pr(>F)`[1], 2))))
  dev.off()
}

2. Analyses at species level

# Aggregate taxa
species_data = aggregate_taxa(otu_data, "Species")
species_rarefied = rarefy_even_depth(species_data, rngseed = 1, 
                                     sample.size = 0.9 * min(sample_sums(species_data)), 
                                     replace = FALSE)

2.1 Alpha diversity: Shannon’s diversity index

P-value is obtained by Kruskal-Wallis Rank Sum Test.

d_alpha = alpha(species_rarefied, index = "diversity_shannon")
df_alpha = data.frame(d = d_alpha$diversity_shannon,
                      meta(species_rarefied) %>% 
                        dplyr::select(alcohol, 
                                      caregiver_stress_level:metabolic_syndrome_level))
covariates = colnames(df_alpha)[-1]
labels = c("Alcohol Use", "Caregiver Stress Level", "Depression Level",
           "Hostility Level", "DAS Level", "Metabolic Syndrome Level")

for (i in seq_along(covariates)) {
  cat("\n \n \n")
  covariate = covariates[i]
  label = labels[i]
  tformula = formula(paste0("d ~ ", covariate)) 
  p_val = kruskal.test(tformula, data = df_alpha)$p.value
  df_ann = data.frame(x = 2.2, y = 1.01 * max(df_alpha$d), p = p_val)
  df_ann = df_ann %>% 
    mutate(label = paste0("p = ", signif(p, 2)))
  
  df_fig = df_alpha %>% 
    transmute(x = !!as.name(covariate), y = d)
  df_fig = df_fig[complete.cases(df_fig), ]
  
  p = df_fig %>%
    ggplot(aes(x = x, y = y)) + 
    geom_violin(trim = FALSE, aes(fill = x)) +
    geom_boxplot(width = 0.1, fill = "white") +
    scale_fill_discrete(name = NULL) +
    geom_label(data = df_ann, aes(x = x, y = y, label = label),
               size = 4, vjust = -0.5, hjust = 0, color = "orange") +
    labs(x = NULL, y = "Shannon’s diversity index", title = label) + 
    theme_bw() + 
    theme(strip.background = element_rect(fill = "white"), 
          legend.position = "bottom",
          plot.title = element_text(hjust = 0.5))
  print(p)
  cat("\n \n \n")
  fig_path1 = paste0("../images/diversity/species_alpha_", covariate, ".jpeg")
  fig_path2 = paste0("../images/diversity/species_alpha_", covariate, ".pdf")
  ggsave(filename = fig_path1, plot = p, height = 5, width = 6.25, 
         units = 'in', dpi = 300) 
  ggsave(filename = fig_path2, plot = p, height = 5, width = 6.25) 
}

2.2 Beta diversity: Bray-Curtis dissimilarity

P-values are obtained by Permutational Multivariate Analysis of Variance (PERMANOVA) and Permutational Analysis of Multivariate Dispersion (PERMDISP).

for (i in seq_along(covariates)) {
  cat("\n \n \n")
  covariate = covariates[i]
  label = labels[i]
  pseq = species_rarefied
  sample_data(pseq)$covariate = meta(pseq)[, covariate]
  pseq_subset = subset_samples(pseq, !is.na(covariate))
  
  set.seed(123)
  # PERMANOVA
  permanova = adonis(t(abundances(pseq_subset)) ~ covariate, 
                     data = meta(pseq_subset), 
                     permutations = 999, method = "bray")$aov.tab
  # PERMDISP
  dis = vegdist(t(abundances(pseq_subset)), method = "bray")
  groups = sample_data(pseq_subset)$covariate
  mod = betadisper(d = dis, group = groups, type = "median")

  # Draw the Plot
  labs = paste0("PCoA", 1:2, " (", signif(100 * mod$eig / sum(mod$eig), 3), "%)")
  # brewer.pal(n = 8, name = "Accent")
  plot(mod, pch = 15:16, cex.lab = 1.25, cex = 1, 
       main = label, 
       xlab = labs[1], ylab = labs[2], ylim = c(-0.4, 0.6), xaxt = "n",
       col = c("#7FC97F", "#BEAED4"), sub = NULL,
       hull = FALSE, ellipse = TRUE, conf = 0.68) # 68% data coverage for data ellipses
  axis(1, at = round(seq(-0.6, 0.6, by = 0.2), 1), las = 1)
  legend(x = 0.5, y = 0.2, legend = unique(groups),
         col = c("#7FC97F", "#BEAED4"), pch = 15:16, cex = 0.8)
  legend(x = 0.2, y = 0.6, cex = 0.7,
         legend = c(paste0("p (PERMANOVA) = ", signif(permanova$`Pr(>F)`[1], 2)),
                    paste0("p (PERMDISP) = ", signif(permutest(mod)$tab$`Pr(>F)`[1], 2))))
  cat("\n \n \n")
  # Export
  fig_path1 = paste0("../images/diversity/species_beta_", covariate, ".jpeg")
  jpeg(filename = fig_path1, height = 5, width = 6.25, res = 300, units = "in")
  plot(mod, pch = 15:16, cex.lab = 1.25, cex = 1, 
       main = label, 
       xlab = labs[1], ylab = labs[2], ylim = c(-0.4, 0.6), xaxt = "n",
       col = c("#7FC97F", "#BEAED4"), sub = NULL,
       hull = FALSE, ellipse = TRUE, conf = 0.68) # 68% data coverage for data ellipses
  axis(1, at = round(seq(-0.6, 0.6, by = 0.2), 1), las = 1)
  legend(x = 0.5, y = 0.2, legend = unique(groups),
         col = c("#7FC97F", "#BEAED4"), pch = 15:16, cex = 0.8)
  legend(x = 0.2, y = 0.6, cex = 0.7,
         legend = c(paste0("p (PERMANOVA) = ", signif(permanova$`Pr(>F)`[1], 2)),
                    paste0("p (PERMDISP) = ", signif(permutest(mod)$tab$`Pr(>F)`[1], 2))))
  dev.off()
  
  fig_path2 = paste0("../images/diversity/species_beta_", covariate, ".pdf")
  pdf(file = fig_path2, height = 5, width = 6.25)
  plot(mod, pch = 15:16, cex.lab = 1.25, cex = 1, 
       main = label, 
       xlab = labs[1], ylab = labs[2], ylim = c(-0.4, 0.6), xaxt = "n",
       col = c("#7FC97F", "#BEAED4"), sub = NULL,
       hull = FALSE, ellipse = TRUE, conf = 0.68) # 68% data coverage for data ellipses
  axis(1, at = round(seq(-0.6, 0.6, by = 0.2), 1), las = 1)
  legend(x = 0.5, y = 0.2, legend = unique(groups),
         col = c("#7FC97F", "#BEAED4"), pch = 15:16, cex = 0.8)
  legend(x = 0.2, y = 0.6, cex = 0.7,
         legend = c(paste0("p (PERMANOVA) = ", signif(permanova$`Pr(>F)`[1], 2)),
                    paste0("p (PERMDISP) = ", signif(permutest(mod)$tab$`Pr(>F)`[1], 2))))
  dev.off()
}

Session information

sessionInfo()
R version 4.1.1 (2021-08-10)
Platform: x86_64-apple-darwin17.0 (64-bit)
Running under: macOS Big Sur 10.16

Matrix products: default
BLAS:   /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRblas.0.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRlapack.dylib

locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
 [1] kableExtra_1.3.4   knitr_1.33         qwraps2_0.5.2      magrittr_2.0.1    
 [5] compositions_2.0-2 vegan_2.5-7        lattice_0.20-44    permute_0.9-5     
 [9] microbiome_1.14.0  phyloseq_1.36.0    forcats_0.5.1      stringr_1.4.0     
[13] dplyr_1.0.7        purrr_0.3.4        tidyr_1.1.3        tibble_3.1.3      
[17] ggplot2_3.3.5      tidyverse_1.3.1    openxlsx_4.2.4     readr_2.0.1       

loaded via a namespace (and not attached):
 [1] Rtsne_0.15             colorspace_2.0-2       ellipsis_0.3.2        
 [4] XVector_0.32.0         fs_1.5.0               rstudioapi_0.13       
 [7] farver_2.1.0           bit64_4.0.5            fansi_0.5.0           
[10] lubridate_1.7.10       xml2_1.3.2             codetools_0.2-18      
[13] splines_4.1.1          robustbase_0.93-8      ade4_1.7-17           
[16] jsonlite_1.7.2         broom_0.7.9            cluster_2.1.2         
[19] dbplyr_2.1.1           compiler_4.1.1         httr_1.4.2            
[22] backports_1.2.1        assertthat_0.2.1       Matrix_1.3-4          
[25] cli_3.0.1              htmltools_0.5.1.1      tools_4.1.1           
[28] igraph_1.2.6           gtable_0.3.0           glue_1.4.2            
[31] GenomeInfoDbData_1.2.6 reshape2_1.4.4         Rcpp_1.0.7            
[34] Biobase_2.52.0         cellranger_1.1.0       vctrs_0.3.8           
[37] Biostrings_2.60.2      rhdf5filters_1.4.0     multtest_2.48.0       
[40] ape_5.5                svglite_2.0.0          nlme_3.1-152          
[43] iterators_1.0.13       tensorA_0.36.2         xfun_0.25             
[46] rvest_1.0.1            lifecycle_1.0.0        DEoptimR_1.0-9        
[49] zlibbioc_1.38.0        MASS_7.3-54            scales_1.1.1          
[52] vroom_1.5.4            hms_1.1.0              parallel_4.1.1        
[55] biomformat_1.20.0      rhdf5_2.36.0           yaml_2.2.1            
[58] stringi_1.7.3          highr_0.9              S4Vectors_0.30.0      
[61] foreach_1.5.1          BiocGenerics_0.38.0    zip_2.2.0             
[64] GenomeInfoDb_1.28.1    rlang_0.4.11           pkgconfig_2.0.3       
[67] systemfonts_1.0.2      bitops_1.0-7           evaluate_0.14         
[70] Rhdf5lib_1.14.2        labeling_0.4.2         bit_4.0.4             
[73] tidyselect_1.1.1       plyr_1.8.6             R6_2.5.0              
[76] IRanges_2.26.0         generics_0.1.0         DBI_1.1.1             
[79] pillar_1.6.2           haven_2.4.3            withr_2.4.2           
[82] mgcv_1.8-36            survival_3.2-12        RCurl_1.98-1.3        
[85] bayesm_3.1-4           modelr_0.1.8           crayon_1.4.1          
[88] utf8_1.2.2             tzdb_0.1.2             rmarkdown_2.10        
[91] grid_4.1.1             readxl_1.3.1           data.table_1.14.0     
[94] webshot_0.5.2          reprex_2.0.1           digest_0.6.27         
[97] stats4_4.1.1           munsell_0.5.0          viridisLite_0.4.0